#Andrew Gooch
#April 11, 2017
#Ripping Yarn, Political Communications
#Replication R file 2: weighting data from first experiment

#This R file recodes treatment variables and demographic variables, and then uses the demos to weight the data using the survey package.

#required packages
library(arm)
library(ggplot2)
library(grid)
library(gridExtra)
library(foreign)
library(survey)



######################
### MARCH
######################

#loads data. Please change to your file directory where the data is located in your terminal
Mar15 <- read.dta("C:/Users/agooch/Dropbox/Working papers/Ripping Yarn/Data/315.dta")
attach(Mar15)

table(Gtreatment,Gmonthyear)

##########################
#RECODING
##########################

names(Mar15)


table(Mar15$Gtreatment)
Mar15$argnum <- as.numeric(Mar15$Gtreatment)
table(Mar15$argnum)
Mar15$PerStoryDummy <- Mar15$argnum
Mar15$PerStoryDummy[Mar15$argnum == 1] <- 0 #control
Mar15$PerStoryDummy[Mar15$argnum == 2] <- 1 #story
table(Mar15$PerStoryDummy, Mar15$Gfavor)

#age
table(Mar15$Gage)
Mar15$Gage2 <- Mar15$Gage
Mar15$Gage2[Mar15$Gage == 1] <- 1 #18-24
Mar15$Gage2[Mar15$Gage == 2] <- 2 #25-34
Mar15$Gage2[Mar15$Gage == 3] <- 2 #35-44
Mar15$Gage2[Mar15$Gage == 4] <- 2 #45-64
Mar15$Gage2[Mar15$Gage == 5] <- 3 #65+
table(Mar15$Gage2, Mar15$Gage)

#AA
table(Mar15$Grace)
Mar15$black <- Mar15$Grace
Mar15$black[Mar15$Grace == 1] <- 0 
Mar15$black[Mar15$Grace == 2] <- 1 #black
Mar15$black[Mar15$Grace == 3] <- 0 
Mar15$black[Mar15$Grace == 4] <- 0 
table(Mar15$black, Mar15$Gage)

#white
table(Mar15$Grace)
Mar15$white <- Mar15$Grace
Mar15$white[Mar15$Grace == 1] <- 1 # white
Mar15$white[Mar15$Grace == 2] <- 0
Mar15$white[Mar15$Grace == 3] <- 0 
Mar15$white[Mar15$Grace == 4] <- 0 
table(Mar15$white, Mar15$Grace)

#hispanic
table(Mar15$Grace)
Mar15$hispanic <- Mar15$Grace
Mar15$hispanic[Mar15$Grace == 1] <- 0
Mar15$hispanic[Mar15$Grace == 2] <- 0
Mar15$hispanic[Mar15$Grace == 3] <- 1 # hispanic 
Mar15$hispanic[Mar15$Grace == 4] <- 0 
table(Mar15$hispanic, Mar15$Grace)


#female
table(Mar15$Ggender)
Mar15$female <- Mar15$Ggender
Mar15$female[Mar15$Ggender == 1] <- 0 
Mar15$female[Mar15$Ggender == 2] <- 1 #female
table(Mar15$female, Mar15$Ggender)


#pid for newer question wording
table(Mar15$Gpid3)
Mar15$pid5_new <- Mar15$Gpid3
Mar15$pid5_new[Mar15$Gpid3 == 1] <- -1 #dem
Mar15$pid5_new[Mar15$Gpid3 == 2] <-  1#rep
Mar15$pid5_new[Mar15$Gpid3 == 3] <- 0 #ind
Mar15$pid5_new[Mar15$Gpid3 == 4] <- 0 #other
table(Mar15$pid5_new)


table(Mar15$Gindlean)
Mar15$leaner_new <- Mar15$Gindlean
Mar15$leaner_new[Mar15$Gindlean == 1] <- -1 #lean dem
Mar15$leaner_new[Mar15$Gindlean == 2] <-  1#lean rep
Mar15$leaner_new[Mar15$Gindlean == 3] <- 0 #neither
table(Mar15$leaner_new)


table(Mar15$PID3_lean_new)
Mar15$Democrats <- Mar15$PID3_lean_new
Mar15$Democrats[Mar15$PID3_lean_new == -1] <- 1 #dem
Mar15$Democrats[Mar15$PID3_lean_new == 0] <-  0#Ind
Mar15$Democrats[Mar15$PID3_lean_new == 1] <- 0 #rep
table(Mar15$Democrats)

#3-point with leaners as partisans
Mar15$PID3_lean_new <- Mar15$leaner_new
Mar15$PID3_lean_new[Mar15$leaner_new== -1 | Mar15$pid5_new == -1] <- 1 #dem
Mar15$PID3_lean_new[Mar15$leaner_new== 0] <- 0 #ind
Mar15$PID3_lean_new[Mar15$leaner_new== 1 | Mar15$pid5_new == 1] <- -1 #rep
table(Mar15$PID3_lean_new)
table(Mar15$PID3_lean_new, Mar15$pid5_new)
table(Mar15$PID3_lean_new, Mar15$leaner_new)

# SUPPORT

table(Mar15$Gcontrol[Mar15$pid5_new==1])
Mar15$argnum <- as.numeric(Mar15$Gcontrol)
table(Mar15$argnum)
Mar15$SuppCont <- Mar15$argnum
Mar15$SuppCont[Mar15$argnum == 1] <- 1 #strong fav
Mar15$SuppCont[Mar15$argnum == 2] <- .75 #fav
Mar15$SuppCont[Mar15$argnum == 3] <- .5 #neither
Mar15$SuppCont[Mar15$argnum == 4] <- .25 #oppose
Mar15$SuppCont[Mar15$argnum == 5] <- 0 #strong opp
Mar15$SuppCont[Mar15$argnum == 6] <- NA #dk
table(Mar15$SuppCont, Mar15$Gcontrol)



table(Mar15$Ggoodjust)
Mar15$argnum <- as.numeric(Mar15$Ggoodjust)
table(Mar15$argnum)
Mar15$SuppPerStory <- Mar15$argnum
Mar15$SuppPerStory[Mar15$argnum == 1] <- 1 #strong fav
Mar15$SuppPerStory[Mar15$argnum == 2] <- .75 #fav
Mar15$SuppPerStory[Mar15$argnum == 3] <- 0.5 #neither
Mar15$SuppPerStory[Mar15$argnum == 4] <- 0.25 #oppose
Mar15$SuppPerStory[Mar15$argnum == 5] <- 0 #strong opp
Mar15$SuppPerStory[Mar15$argnum == 6] <- NA #dk
table(Mar15$SuppPerStory, Mar15$Ggoodjust)

table(Mar15$Gfavor)
Mar15$argnum <- as.numeric(Mar15$Gfavor)
table(Mar15$argnum)
Mar15$BidenFavor <- Mar15$argnum
Mar15$BidenFavor[Mar15$argnum == 1] <- 1 #strong fav
Mar15$BidenFavor[Mar15$argnum == 2] <- .75 #fav
Mar15$BidenFavor[Mar15$argnum == 3] <- 0.5 #neither
Mar15$BidenFavor[Mar15$argnum == 4] <- 0.25 #oppose
Mar15$BidenFavor[Mar15$argnum == 5] <- 0 #strong opp
Mar15$BidenFavor[Mar15$argnum == 6] <- NA #dk
table(Mar15$BidenFavor, Mar15$Gfavor)

table(Mar15$Gtreatment)
Mar15$argnum <- as.numeric(Mar15$Gtreatment)
table(Mar15$argnum)
Mar15$StoryDummy <- Mar15$argnum
Mar15$StoryDummy[Mar15$argnum == 1] <- 0 #control
Mar15$StoryDummy[Mar15$argnum == 2] <- 1 #story

table(Mar15$Gtreatment)
Mar15$argnum <- as.numeric(Mar15$Gtreatment)
table(Mar15$argnum)
Mar15$ControlDummy <- Mar15$argnum
Mar15$ControlDummy[Mar15$argnum == 1] <- 1 #control
Mar15$ControlDummy[Mar15$argnum == 2] <- 0 #story

#######################################
# WEIGHT DATASET BY GENDER, AGE, RACE
#######################################


#CENUS - CURRENT POPULATION SURVEY
#percent voted in 2012
# AA voting pop = 0.1339847 (c2)
# His Voting pop = 0.0841532 (c3)
# whi Voting pop = 0.7374387(c1)
# Oth Voting pop = 0.0444234 (c4)

#CENUS - CURRENT POPULATION SURVEY
#percent voted in 2012
# fem = 0.5270833
# mal = 0.4729167


#CENUS - CURRENT POPULATION SURVEY
#percetn voted in 2012
#18-24 = 0.09 
#25-64 = 0.69 
#65+ = 0.22


#creates dataset to make weights
Mar15.svy.unweighted <- svydesign(ids=~1, data=Mar15)
summary(Mar15.svy.unweighted)


#population dataframes (weights target this level)
gender.dist <- data.frame(Ggender = c("1", "2"),
                       Freq = nrow(Mar15) * c(0.4729167, 0.5270833))

age.dist <- data.frame(Gage2 = c("1", "2", "3"),
                       Freq = nrow(Mar15) * c(0.09, 0.69, .22))

race.dist <- data.frame(Grace = c("1", "2", "3", "4"),
                       Freq = nrow(Mar15) * c(0.7374387, 0.1339847,0.0841532,0.0444234))




#sample.margins = var from survey data
#population.margins = weighted var
#design = survey dataset made in line 22
#This is the weighted dataset
Mar15.svy.rake <- rake(design = Mar15.svy.unweighted,
                       sample.margins = list(~Ggender, ~Grace),
                       population.margins = list(gender.dist, race.dist))

#new datatset containing the weight
summary(Mar15.svy.rake)

#Compares distros
prop.table(table(Mar15$Ggender))
prop.table(svytable(~Ggender, design = Mar15.svy.rake))

#Compares distros
prop.table(table(Mar15$Gage2))
prop.table(svytable(~Gage2, design = Mar15.svy.rake))

#Compares distros
prop.table(table(Mar15$Grace))
prop.table(svytable(~Grace, design = Mar15.svy.rake))



#shows how much people are weighted. We don't want a big MAX number because that would mean one person is being weighted up a lot
#no lower than .5 or higher than 2.5
summary(weights(Mar15.svy.rake))

#sample sizes by treatment arm
svytable(~Gtreatment, design = Mar15.svy.rake)